/*
 * explicit_instantiation_macros.h
 *
 * L. Mazur, 28 Dec 2018
 *
 * Header to initialize preprocessor variables and helper macros to explicitly instantiate templates. This is needed,
 * since we don't necessarily want to compile everything; we can save compile time by compiling only the precision,
 * Halosize, etc. which is required for a particular program. Unfortunately macros have to be defined from the
 * bottom up. What this means is that you should read each macro from the define at the bottom, then move upwards.
 * For the simplest example, look at the precision/halo definition.
 *
 */

#pragma once
#include "preprocessorWrapper.h"


///  ======== NO_GPU ===========

// If NO_GPU is NOT specified in the CMakeLists, set it to False.
#ifndef NO_GPU
#define NO_GPU 0
#endif


///  ======== PRECISION ========

#ifndef DOUBLEPREC
#define DOUBLEPREC 0
#endif

#ifndef SINGLEPREC
#define SINGLEPREC 0
#endif

#ifndef HALFPREC
#define HALFPREC 0
#endif


///  ======== HALODEPTH ========

#define MAXHALO 4

#ifdef HALODEPTH_0
#undef MAXUSEDHALO
#define MAXUSEDHALO 0
#else
#define HALODEPTH_0 0
#endif

#ifdef HALODEPTH_1
#undef MAXUSEDHALO
#define MAXUSEDHALO 1
#else
#define HALODEPTH_1 0
#endif

#ifdef HALODEPTH_2
#undef MAXUSEDHALO
#define MAXUSEDHALO 2
#else
#define HALODEPTH_2 0
#endif

#ifdef HALODEPTH_3
#undef MAXUSEDHALO
#define MAXUSEDHALO 3
#else
#define HALODEPTH_3 0
#endif

#ifdef HALODEPTH_4
#undef MAXUSEDHALO
#define MAXUSEDHALO 4
#else
#define HALODEPTH_4 0
#endif

///  ======== HALODEPTHSPIN ========

#ifndef HALODEPTHSPIN_0
#define HALODEPTHSPIN_0 0
#endif

#ifndef HALODEPTHSPIN_1
#define HALODEPTHSPIN_1 0
#endif

#ifndef HALODEPTHSPIN_2
#define HALODEPTHSPIN_2 0
#endif

#ifndef HALODEPTHSPIN_3
#define HALODEPTHSPIN_3 0
#endif

#ifndef HALODEPTHSPIN_4
#define HALODEPTHSPIN_4 0
#endif

///  ======== COMPRESSION ======

enum CompressionType {
    R12,        // SU3
    STAGG_R12,  // SU3 with staggered phases. At the moment this should never be used!
    U3R14,      // A normal U3 Matrix
    R14,        // Real number * U3 = complex * SU3 (Might be more expensive than R18 on some architecture.
    R18,        // Full matrix without any compression
};


#ifndef COMP_R12
#define COMP_R12 0
#endif

#ifndef COMP_STAGGR12
#define COMP_STAGGR12 0
#endif

#ifndef COMP_R14
#define COMP_R14 0
#endif

#ifndef COMP_U3R14
#define COMP_U3R14 0
#endif

#ifndef COMP_R18
#define COMP_R18 0
#endif


///  ======== LAYOUT ===========

#ifndef LAYOUT_ALL
#define LAYOUT_ALL 0
#endif

#ifndef LAYOUT_EVEN
#define LAYOUT_EVEN 0
#endif

#ifndef LAYOUT_ODD
#define LAYOUT_ODD 0
#endif


///  ======== STACKS ===========

#ifndef NSTACKS_1
#define NSTACKS_1 0
#endif

#ifndef NSTACKS_2
#define NSTACKS_2 0
#endif

#ifndef NSTACKS_3
#define NSTACKS_3 0
#endif

#ifndef NSTACKS_4
#define NSTACKS_4 0
#endif

#ifndef NSTACKS_5
#define NSTACKS_5 0
#endif

#ifndef NSTACKS_6
#define NSTACKS_6 0
#endif

#ifndef NSTACKS_7
#define NSTACKS_7 0
#endif

#ifndef NSTACKS_8
#define NSTACKS_8 0
#endif

#ifndef NSTACKS_9
#define NSTACKS_9 0
#endif

#ifndef NSTACKS_10
#define NSTACKS_10 0
#endif

#ifndef NSTACKS_11
#define NSTACKS_11 0
#endif

#ifndef NSTACKS_12
#define NSTACKS_12 0
#endif

#ifndef NSTACKS_14
#define NSTACKS_14 0
#endif

#ifndef NSTACKS_15
#define NSTACKS_15 0
#endif

#ifndef NSTACKS_16
#define NSTACKS_16 0
#endif
#ifndef NSTACKS_18
#define NSTACKS_18 0
#endif
#ifndef NSTACKS_20
#define NSTACKS_20 0
#endif
#ifndef NSTACKS_32
#define NSTACKS_32 0
#endif



///  ======== STACKS BLOCK ===========

//define 1 by default
#ifndef NSTACKS_BLOCK_1  
#define NSTACKS_BLOCK_1 1
#endif

#ifndef NSTACKS_BLOCK_2
#define NSTACKS_BLOCK_2 0
#endif

#ifndef NSTACKS_BLOCK_3
#define NSTACKS_BLOCK_3 0
#endif

#ifndef NSTACKS_BLOCK_4
#define NSTACKS_BLOCK_4 0
#endif

#ifndef NSTACKS_BLOCK_5
#define NSTACKS_BLOCK_5 0
#endif

#ifndef NSTACKS_BLOCK_6
#define NSTACKS_BLOCK_6 0
#endif

#ifndef NSTACKS_BLOCK_7
#define NSTACKS_BLOCK_7 0
#endif

#ifndef NSTACKS_BLOCK_8
#define NSTACKS_BLOCK_8 0
#endif


/// =================== Initialize everything ===================================================

#define COMP_LOOP_ALL(INIT_TEMPLATES, onDevice, floatT, HALO_DEPTH) \
    IF(BOOL(COMP_R12))      (INIT_TEMPLATES(onDevice, floatT, HALO_DEPTH, R12)) \
    IF(BOOL(COMP_STAGGR12)) (INIT_TEMPLATES(onDevice, floatT, HALO_DEPTH, STAGG_R12)) \
    IF(BOOL(COMP_R14))      (INIT_TEMPLATES(onDevice, floatT, HALO_DEPTH, R14)) \
    IF(BOOL(COMP_R18))      (INIT_TEMPLATES(onDevice, floatT, HALO_DEPTH, R18)) \
    IF(BOOL(COMP_U3R14))    (INIT_TEMPLATES(onDevice, floatT, HALO_DEPTH, U3R14))

#define HALO_LOOP_ALL(INIT_TEMPLATES, onDevice, floatT) \
    IF(BOOL(HALODEPTH_0)) (COMP_LOOP_ALL(INIT_TEMPLATES, onDevice, floatT, 0)) \
    IF(BOOL(HALODEPTH_1)) (COMP_LOOP_ALL(INIT_TEMPLATES, onDevice, floatT, 1)) \
    IF(BOOL(HALODEPTH_2)) (COMP_LOOP_ALL(INIT_TEMPLATES, onDevice, floatT, 2)) \
    IF(BOOL(HALODEPTH_3)) (COMP_LOOP_ALL(INIT_TEMPLATES, onDevice, floatT, 3)) \
    IF(BOOL(HALODEPTH_4)) (COMP_LOOP_ALL(INIT_TEMPLATES, onDevice, floatT, 4))

#define FLOAT_LOOP_ALL(INIT_TEMPLATES, onDevice) \
    IF(BOOL(DOUBLEPREC)) (HALO_LOOP_ALL(INIT_TEMPLATES, onDevice, double)) \
    IF(BOOL(SINGLEPREC)) (HALO_LOOP_ALL(INIT_TEMPLATES, onDevice, float))

#define DEVICE_LOOP_ALL(INIT_TEMPLATES) \
    IF(NOT(BOOL(NO_GPU))) (FLOAT_LOOP_ALL(INIT_TEMPLATES, true)) \
    IF(BOOL(NO_GPU))      (FLOAT_LOOP_ALL(INIT_TEMPLATES, false))

#define INIT_ALL(INIT_TEMPLATES) \
    DEVICE_LOOP_ALL(INIT_TEMPLATES)


/// =================== Initialize Precision and Halos ==========================================

/// 3. ... which is defined here.
#define HALO_LOOP_PH(INIT_TEMPLATES, floatT) \
    IF(BOOL(HALODEPTH_0)) (INIT_TEMPLATES(floatT, 0)) \
    IF(BOOL(HALODEPTH_1)) (INIT_TEMPLATES(floatT, 1)) \
    IF(BOOL(HALODEPTH_2)) (INIT_TEMPLATES(floatT, 2)) \
    IF(BOOL(HALODEPTH_3)) (INIT_TEMPLATES(floatT, 3)) \
    IF(BOOL(HALODEPTH_4)) (INIT_TEMPLATES(floatT, 4))

/// 2. FLOAT_LOOP_PH is defined here. It gives double or float to floatT, using HALO_LOOP_PH...
#define FLOAT_LOOP_PH(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (HALO_LOOP_PH(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (HALO_LOOP_PH(INIT_TEMPLATES, float))

/// 1. INIT_TEMPLATES is passed here from the .cpp file. We define INIT_PH = FLOAT_LOOP_PH.
#define INIT_PH(INIT_TEMPLATES) \
    FLOAT_LOOP_PH(INIT_TEMPLATES)


/// =================== Initialize Halos and half ==========================================

/// 3. ... which is defined here.
#define HALO_LOOP_H_HALF(INIT_TEMPLATES, floatT) \
    IF(BOOL(HALODEPTH_0)) (INIT_TEMPLATES(floatT, 0)) \
    IF(BOOL(HALODEPTH_1)) (INIT_TEMPLATES(floatT, 1)) \
    IF(BOOL(HALODEPTH_2)) (INIT_TEMPLATES(floatT, 2)) \
    IF(BOOL(HALODEPTH_3)) (INIT_TEMPLATES(floatT, 3)) \
    IF(BOOL(HALODEPTH_4)) (INIT_TEMPLATES(floatT, 4))

/// 2. FLOAT_LOOP_PH is defined here. It gives double or float to floatT, using HALO_LOOP_PH...
#define FLOAT_LOOP_H_HALF(INIT_TEMPLATES) \
    IF(BOOL(HALFPREC)) (HALO_LOOP_H_HALF(INIT_TEMPLATES, __half))

/// 1. INIT_TEMPLATES is passed here from the .cpp file. We define INIT_PH = FLOAT_LOOP_PH.
#define INIT_H_HALF(INIT_TEMPLATES) \
    FLOAT_LOOP_H_HALF(INIT_TEMPLATES)


/// =================== Initialize Precision (incl half) and Halos ==========================================

/// 3. ... which is defined here.
#define HALO_LOOP_PH_HALF(INIT_TEMPLATES, floatT) \
    IF(BOOL(HALODEPTH_0)) (INIT_TEMPLATES(floatT, 0)) \
    IF(BOOL(HALODEPTH_1)) (INIT_TEMPLATES(floatT, 1)) \
    IF(BOOL(HALODEPTH_2)) (INIT_TEMPLATES(floatT, 2)) \
    IF(BOOL(HALODEPTH_3)) (INIT_TEMPLATES(floatT, 3)) \
    IF(BOOL(HALODEPTH_4)) (INIT_TEMPLATES(floatT, 4))

/// 2. FLOAT_LOOP_PH is defined here. It gives double or float to floatT, using HALO_LOOP_PH...
#define FLOAT_LOOP_PH_HALF(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (HALO_LOOP_PH_HALF(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (HALO_LOOP_PH_HALF(INIT_TEMPLATES, float)) \
    IF(BOOL(HALFPREC)) (HALO_LOOP_PH_HALF(INIT_TEMPLATES, __half))

/// 1. INIT_TEMPLATES is passed here from the .cpp file. We define INIT_PH = FLOAT_LOOP_PH.
#define INIT_PH_HALF(INIT_TEMPLATES) \
    FLOAT_LOOP_PH_HALF(INIT_TEMPLATES)



/// =================== Initialize Precision, Halos and Compression =============================

#define COMP_LOOP_PHC(INIT_TEMPLATES, floatT, HALO_DEPTH) \
    IF(BOOL(COMP_R12))      (INIT_TEMPLATES(floatT, HALO_DEPTH, R12)) \
    IF(BOOL(COMP_STAGGR12)) (INIT_TEMPLATES(floatT, HALO_DEPTH, STAGG_R12)) \
    IF(BOOL(COMP_R14))      (INIT_TEMPLATES(floatT, HALO_DEPTH, R14)) \
    IF(BOOL(COMP_R18))      (INIT_TEMPLATES(floatT, HALO_DEPTH, R18)) \
    IF(BOOL(COMP_U3R14))    (INIT_TEMPLATES(floatT, HALO_DEPTH, U3R14))

#define HALO_LOOP_PHC(INIT_TEMPLATES, floatT) \
    IF(BOOL(HALODEPTH_0)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 0)) \
    IF(BOOL(HALODEPTH_1)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 1)) \
    IF(BOOL(HALODEPTH_2)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 2)) \
    IF(BOOL(HALODEPTH_3)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 3)) \
    IF(BOOL(HALODEPTH_4)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 4))

#define FLOAT_LOOP_PHC(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (HALO_LOOP_PHC(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (HALO_LOOP_PHC(INIT_TEMPLATES, float))

#define INIT_PHC(INIT_TEMPLATES) \
    FLOAT_LOOP_PHC(INIT_TEMPLATES)

/// =================== Initialize Precision (incl half), Halos and Compression =============================

#define COMP_LOOP_PHC_HALF(INIT_TEMPLATES, floatT, HALO_DEPTH) \
    IF(BOOL(COMP_R12))      (INIT_TEMPLATES(floatT, HALO_DEPTH, R12)) \
    IF(BOOL(COMP_STAGGR12)) (INIT_TEMPLATES(floatT, HALO_DEPTH, STAGG_R12)) \
    IF(BOOL(COMP_R14))      (INIT_TEMPLATES(floatT, HALO_DEPTH, R14)) \
    IF(BOOL(COMP_R18))      (INIT_TEMPLATES(floatT, HALO_DEPTH, R18)) \
    IF(BOOL(COMP_U3R14))    (INIT_TEMPLATES(floatT, HALO_DEPTH, U3R14))

#define HALO_LOOP_PHC_HALF(INIT_TEMPLATES, floatT) \
    IF(BOOL(HALODEPTH_0)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 0)) \
    IF(BOOL(HALODEPTH_1)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 1)) \
    IF(BOOL(HALODEPTH_2)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 2)) \
    IF(BOOL(HALODEPTH_3)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 3)) \
    IF(BOOL(HALODEPTH_4)) (COMP_LOOP_PHC(INIT_TEMPLATES, floatT, 4))

#define FLOAT_LOOP_PHC_HALF(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (HALO_LOOP_PHC(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (HALO_LOOP_PHC(INIT_TEMPLATES, float)) \
    IF(BOOL(HALFPREC)) (HALO_LOOP_PHC(INIT_TEMPLATES, __half))

#define INIT_PHC_HALF(INIT_TEMPLATES) \
    FLOAT_LOOP_PHC_HALF(INIT_TEMPLATES)

/// =================== Initialize Precision, Layout, Halos, Nstacks ============================

#define STACK_LOOP_PLHN(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH) \
    IF(BOOL(NSTACKS_1))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 1)) \
    IF(BOOL(NSTACKS_2))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 2)) \
    IF(BOOL(NSTACKS_3))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 3)) \
    IF(BOOL(NSTACKS_4))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 4)) \
    IF(BOOL(NSTACKS_5))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 5)) \
    IF(BOOL(NSTACKS_6))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 6)) \
    IF(BOOL(NSTACKS_7))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 7)) \
    IF(BOOL(NSTACKS_8))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 8)) \
    IF(BOOL(NSTACKS_9))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 9)) \
    IF(BOOL(NSTACKS_10))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 10)) \
    IF(BOOL(NSTACKS_11))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 11)) \
    IF(BOOL(NSTACKS_12)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 12)) \
    IF(BOOL(NSTACKS_14)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 14)) \
    IF(BOOL(NSTACKS_15)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 15)) \
    IF(BOOL(NSTACKS_16)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 16)) \
    IF(BOOL(NSTACKS_18)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 18)) \
    IF(BOOL(NSTACKS_20)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 20)) \
    IF(BOOL(NSTACKS_32)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 32))

#define HALO_LOOP_PLHN(INIT_TEMPLATES, floatT, LAYOUT) \
    IF(BOOL(HALODEPTH_0)) (STACK_LOOP_PLHN(INIT_TEMPLATES, floatT, LAYOUT, 0)) \
    IF(BOOL(HALODEPTH_1)) (STACK_LOOP_PLHN(INIT_TEMPLATES, floatT, LAYOUT, 1)) \
    IF(BOOL(HALODEPTH_2)) (STACK_LOOP_PLHN(INIT_TEMPLATES, floatT, LAYOUT, 2)) \
    IF(BOOL(HALODEPTH_3)) (STACK_LOOP_PLHN(INIT_TEMPLATES, floatT, LAYOUT, 3)) \
    IF(BOOL(HALODEPTH_4)) (STACK_LOOP_PLHN(INIT_TEMPLATES, floatT, LAYOUT, 4))

#define LAYOUT_LOOP_PLHN(INIT_TEMPLATES, floatT) \
    IF(BOOL(LAYOUT_ALL))  (HALO_LOOP_PLHN(INIT_TEMPLATES, floatT, All)) \
    IF(BOOL(LAYOUT_EVEN)) (HALO_LOOP_PLHN(INIT_TEMPLATES, floatT, Even)) \
    IF(BOOL(LAYOUT_ODD))  (HALO_LOOP_PLHN(INIT_TEMPLATES, floatT, Odd))

#define FLOAT_LOOP_PLHN(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (LAYOUT_LOOP_PLHN(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (LAYOUT_LOOP_PLHN(INIT_TEMPLATES, float))

#define INIT_PLHN(INIT_TEMPLATES) \
    FLOAT_LOOP_PLHN(INIT_TEMPLATES)

/// =================== Initialize Precision, Layout, Halos, Nstacks ============================

#define STACK_LOOP_PLHSN(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH) \
    IF(BOOL(NSTACKS_1))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 1)) \
    IF(BOOL(NSTACKS_2))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 2)) \
    IF(BOOL(NSTACKS_3))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 3)) \
    IF(BOOL(NSTACKS_4))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 4)) \
    IF(BOOL(NSTACKS_5))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 5)) \
    IF(BOOL(NSTACKS_6))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 6)) \
    IF(BOOL(NSTACKS_7))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 7)) \
    IF(BOOL(NSTACKS_8))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 8)) \
    IF(BOOL(NSTACKS_9))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 9)) \
    IF(BOOL(NSTACKS_10))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 10)) \
    IF(BOOL(NSTACKS_11))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 11)) \
    IF(BOOL(NSTACKS_12)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 12)) \
    IF(BOOL(NSTACKS_14)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 14)) \
    IF(BOOL(NSTACKS_15)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 15)) \
    IF(BOOL(NSTACKS_16)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 16)) \
    IF(BOOL(NSTACKS_18)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 18)) \
    IF(BOOL(NSTACKS_20)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 20)) \
    IF(BOOL(NSTACKS_32)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 32))

#define HALOSPIN_LOOP_PLHSN(INIT_TEMPLATES, floatT, LAYOUT) \
    IF(BOOL(HALODEPTHSPIN_0)) (STACK_LOOP_PLHSN(INIT_TEMPLATES, floatT, LAYOUT, 0)) \
    IF(BOOL(HALODEPTHSPIN_1)) (STACK_LOOP_PLHSN(INIT_TEMPLATES, floatT, LAYOUT, 1)) \
    IF(BOOL(HALODEPTHSPIN_2)) (STACK_LOOP_PLHSN(INIT_TEMPLATES, floatT, LAYOUT, 2)) \
    IF(BOOL(HALODEPTHSPIN_3)) (STACK_LOOP_PLHSN(INIT_TEMPLATES, floatT, LAYOUT, 3)) \
    IF(BOOL(HALODEPTHSPIN_4)) (STACK_LOOP_PLHSN(INIT_TEMPLATES, floatT, LAYOUT, 4))

#define LAYOUT_LOOP_PLHSN(INIT_TEMPLATES, floatT) \
    IF(BOOL(LAYOUT_ALL))  (HALOSPIN_LOOP_PLHSN(INIT_TEMPLATES, floatT, All)) \
    IF(BOOL(LAYOUT_EVEN)) (HALOSPIN_LOOP_PLHSN(INIT_TEMPLATES, floatT, Even)) \
    IF(BOOL(LAYOUT_ODD))  (HALOSPIN_LOOP_PLHSN(INIT_TEMPLATES, floatT, Odd))

#define FLOAT_LOOP_PLHSN(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (LAYOUT_LOOP_PLHSN(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (LAYOUT_LOOP_PLHSN(INIT_TEMPLATES, float))

#define INIT_PLHSN(INIT_TEMPLATES) \
    FLOAT_LOOP_PLHSN(INIT_TEMPLATES)

/// =================== Initialize Precision (incl half), Layout, Halos, Nstacks ============================

#define STACK_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH) \
    IF(BOOL(NSTACKS_1))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 1)) \
    IF(BOOL(NSTACKS_2))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 2)) \
    IF(BOOL(NSTACKS_3))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 3)) \
    IF(BOOL(NSTACKS_4))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 4)) \
    IF(BOOL(NSTACKS_5))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 5)) \
    IF(BOOL(NSTACKS_6))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 6)) \
    IF(BOOL(NSTACKS_7))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 7)) \
    IF(BOOL(NSTACKS_8))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 8)) \
    IF(BOOL(NSTACKS_9))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 9)) \
    IF(BOOL(NSTACKS_10))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 10)) \
    IF(BOOL(NSTACKS_11))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 11)) \
    IF(BOOL(NSTACKS_12)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 12)) \
    IF(BOOL(NSTACKS_14)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 14)) \
    IF(BOOL(NSTACKS_15)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 15)) \
    IF(BOOL(NSTACKS_16)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 16)) \
    IF(BOOL(NSTACKS_18)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 18)) \
    IF(BOOL(NSTACKS_20)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 20)) \
    IF(BOOL(NSTACKS_32)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, 32))

#define HALOSPIN_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT) \
    IF(BOOL(HALODEPTHSPIN_0)) (STACK_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 0)) \
    IF(BOOL(HALODEPTHSPIN_1)) (STACK_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 1)) \
    IF(BOOL(HALODEPTHSPIN_2)) (STACK_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 2)) \
    IF(BOOL(HALODEPTHSPIN_3)) (STACK_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 3)) \
    IF(BOOL(HALODEPTHSPIN_4)) (STACK_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 4))

#define LAYOUT_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT) \
    IF(BOOL(LAYOUT_ALL))  (HALOSPIN_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, All)) \
    IF(BOOL(LAYOUT_EVEN)) (HALOSPIN_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, Even)) \
    IF(BOOL(LAYOUT_ODD))  (HALOSPIN_LOOP_PLHSN_HALF(INIT_TEMPLATES, floatT, Odd))

#define FLOAT_LOOP_PLHSN_HALF(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (LAYOUT_LOOP_PLHSN_HALF(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (LAYOUT_LOOP_PLHSN_HALF(INIT_TEMPLATES, float)) \
    IF(BOOL(HALFPREC)) (LAYOUT_LOOP_PLHSN_HALF(INIT_TEMPLATES, __half))

#define INIT_PLHSN_HALF(INIT_TEMPLATES) \
    FLOAT_LOOP_PLHSN_HALF(INIT_TEMPLATES)

/// =================== Initialize Precision, Nstacks ============================

#define STACK_LOOP_PN(INIT_TEMPLATES, floatT) \
    IF(BOOL(NSTACKS_1))  (INIT_TEMPLATES(floatT, 1)) \
    IF(BOOL(NSTACKS_2))  (INIT_TEMPLATES(floatT, 2)) \
    IF(BOOL(NSTACKS_3))  (INIT_TEMPLATES(floatT, 3)) \
    IF(BOOL(NSTACKS_4))  (INIT_TEMPLATES(floatT, 4)) \
    IF(BOOL(NSTACKS_5))  (INIT_TEMPLATES(floatT, 5)) \
    IF(BOOL(NSTACKS_6))  (INIT_TEMPLATES(floatT, 6)) \
    IF(BOOL(NSTACKS_7))  (INIT_TEMPLATES(floatT, 7)) \
    IF(BOOL(NSTACKS_8))  (INIT_TEMPLATES(floatT, 8)) \
    IF(BOOL(NSTACKS_9))  (INIT_TEMPLATES(floatT, 9)) \
    IF(BOOL(NSTACKS_10))  (INIT_TEMPLATES(floatT, 10)) \
    IF(BOOL(NSTACKS_11))  (INIT_TEMPLATES(floatT, 11)) \
    IF(BOOL(NSTACKS_12)) (INIT_TEMPLATES(floatT, 12)) \
    IF(BOOL(NSTACKS_14)) (INIT_TEMPLATES(floatT, 14)) \
    IF(BOOL(NSTACKS_15)) (INIT_TEMPLATES(floatT, 15)) \
    IF(BOOL(NSTACKS_16)) (INIT_TEMPLATES(floatT, 16)) \
    IF(BOOL(NSTACKS_18)) (INIT_TEMPLATES(floatT, 18)) \
    IF(BOOL(NSTACKS_20)) (INIT_TEMPLATES(floatT, 20)) \
    IF(BOOL(NSTACKS_32)) (INIT_TEMPLATES(floatT, 32))

#define FLOAT_LOOP_PN(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (STACK_LOOP_PN(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (STACK_LOOP_PN(INIT_TEMPLATES, float))

#define INIT_PN(INIT_TEMPLATES) \
    FLOAT_LOOP_PN(INIT_TEMPLATES)

/// =================== Initialize Precision, Nstacks ============================

#define STACK_LOOP_PN_HALF(INIT_TEMPLATES, floatT) \
    IF(BOOL(NSTACKS_1))  (INIT_TEMPLATES(floatT, 1)) \
    IF(BOOL(NSTACKS_2))  (INIT_TEMPLATES(floatT, 2)) \
    IF(BOOL(NSTACKS_3))  (INIT_TEMPLATES(floatT, 3)) \
    IF(BOOL(NSTACKS_4))  (INIT_TEMPLATES(floatT, 4)) \
    IF(BOOL(NSTACKS_5))  (INIT_TEMPLATES(floatT, 5)) \
    IF(BOOL(NSTACKS_6))  (INIT_TEMPLATES(floatT, 6)) \
    IF(BOOL(NSTACKS_7))  (INIT_TEMPLATES(floatT, 7)) \
    IF(BOOL(NSTACKS_8))  (INIT_TEMPLATES(floatT, 8)) \
    IF(BOOL(NSTACKS_9))  (INIT_TEMPLATES(floatT, 9)) \
    IF(BOOL(NSTACKS_10))  (INIT_TEMPLATES(floatT, 10)) \
    IF(BOOL(NSTACKS_11))  (INIT_TEMPLATES(floatT, 11)) \
    IF(BOOL(NSTACKS_12)) (INIT_TEMPLATES(floatT, 12)) \
    IF(BOOL(NSTACKS_14)) (INIT_TEMPLATES(floatT, 14)) \
    IF(BOOL(NSTACKS_15)) (INIT_TEMPLATES(floatT, 15)) \
    IF(BOOL(NSTACKS_16)) (INIT_TEMPLATES(floatT, 16)) \
    IF(BOOL(NSTACKS_18)) (INIT_TEMPLATES(floatT, 18)) \
    IF(BOOL(NSTACKS_20)) (INIT_TEMPLATES(floatT, 20)) \
    IF(BOOL(NSTACKS_32)) (INIT_TEMPLATES(floatT, 32))

#define FLOAT_LOOP_PN_HALF(INIT_TEMPLATES) \
    IF(BOOL(HALF)) (STACK_LOOP_PN_HALF(INIT_TEMPLATES, __half))
#define INIT_PN_HALF(INIT_TEMPLATES) \
    FLOAT_LOOP_PN_HALF(INIT_TEMPLATES)

/// ================== Initialize Precision, Halo, HaloSpin ====================
#define HALOSPIN_LOOP_PHHS(INIT_TEMPLATES, floatT, Halo) \
  IF(BOOL(HALODEPTHSPIN_0)) (INIT_TEMPLATES(floatT, Halo, 0))	\
  IF(BOOL(HALODEPTHSPIN_1)) (INIT_TEMPLATES(floatT, Halo, 1))	\
  IF(BOOL(HALODEPTHSPIN_2)) (INIT_TEMPLATES(floatT, Halo, 2))	\
  IF(BOOL(HALODEPTHSPIN_3)) (INIT_TEMPLATES(floatT, Halo, 3))	\
  IF(BOOL(HALODEPTHSPIN_4)) (INIT_TEMPLATES(floatT, Halo, 4))

#define HALO_LOOP_PHHS(INIT_TEMPLATES, floatT)	      \
  IF(BOOL(HALODEPTH_0)) (HALOSPIN_LOOP_PHHS(INIT_TEMPLATES,floatT, 0))	\
  IF(BOOL(HALODEPTH_1)) (HALOSPIN_LOOP_PHHS(INIT_TEMPLATES,floatT, 1))		\
  IF(BOOL(HALODEPTH_2)) (HALOSPIN_LOOP_PHHS(INIT_TEMPLATES,floatT, 2))		\
  IF(BOOL(HALODEPTH_3)) (HALOSPIN_LOOP_PHHS(INIT_TEMPLATES,floatT, 3))		\
  IF(BOOL(HALODEPTH_4)) (HALOSPIN_LOOP_PHHS(INIT_TEMPLATES,floatT, 4))

#define FLOAT_LOOP_PHHS(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (HALO_LOOP_PHHS(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (HALO_LOOP_PHHS(INIT_TEMPLATES, float))

#define INIT_PHHS(INIT_TEMPLATES) \
    FLOAT_LOOP_PHHS(INIT_TEMPLATES)

/// =================== Initialize Precision, Layout, Halos, HaloSpin, Nstacks ============================

#define STACK_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN)	\
    IF(BOOL(NSTACKS_1))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 1)) \
    IF(BOOL(NSTACKS_2))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 2)) \
    IF(BOOL(NSTACKS_3))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 3)) \
    IF(BOOL(NSTACKS_4))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 4)) \
    IF(BOOL(NSTACKS_5))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 5)) \
    IF(BOOL(NSTACKS_6))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 6)) \
    IF(BOOL(NSTACKS_7))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 7)) \
    IF(BOOL(NSTACKS_8))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 8)) \
    IF(BOOL(NSTACKS_9))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 9)) \
    IF(BOOL(NSTACKS_10))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 10)) \
    IF(BOOL(NSTACKS_11))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 11)) \
    IF(BOOL(NSTACKS_12)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 12)) \
    IF(BOOL(NSTACKS_14)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 14)) \
    IF(BOOL(NSTACKS_15)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 15)) \
    IF(BOOL(NSTACKS_16)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 16)) \
    IF(BOOL(NSTACKS_18)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 18)) \
    IF(BOOL(NSTACKS_20)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 20)) \
    IF(BOOL(NSTACKS_32)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 32))

#define HALOSPIN_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH) \
  IF(BOOL(HALODEPTHSPIN_0)) (STACK_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 0)) \
  IF(BOOL(HALODEPTHSPIN_1)) (STACK_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 1)) \
  IF(BOOL(HALODEPTHSPIN_2)) (STACK_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 2)) \
  IF(BOOL(HALODEPTHSPIN_3)) (STACK_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 3)) \
  IF(BOOL(HALODEPTHSPIN_4)) (STACK_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 4))


#define HALO_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT) \
    IF(BOOL(HALODEPTH_0)) (HALOSPIN_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, 0)) \
    IF(BOOL(HALODEPTH_1)) (HALOSPIN_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, 1)) \
    IF(BOOL(HALODEPTH_2)) (HALOSPIN_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, 2)) \
    IF(BOOL(HALODEPTH_3)) (HALOSPIN_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, 3)) \
    IF(BOOL(HALODEPTH_4)) (HALOSPIN_LOOP_PLHHSN(INIT_TEMPLATES, floatT, LAYOUT, 4))

#define LAYOUT_LOOP_PLHHSN(INIT_TEMPLATES, floatT) \
    IF(BOOL(LAYOUT_ALL))  (HALO_LOOP_PLHHSN(INIT_TEMPLATES, floatT, All)) \
    IF(BOOL(LAYOUT_EVEN)) (HALO_LOOP_PLHHSN(INIT_TEMPLATES, floatT, Even)) \
    IF(BOOL(LAYOUT_ODD))  (HALO_LOOP_PLHHSN(INIT_TEMPLATES, floatT, Odd))

#define FLOAT_LOOP_PLHHSN(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (LAYOUT_LOOP_PLHHSN(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (LAYOUT_LOOP_PLHHSN(INIT_TEMPLATES, float))

#define INIT_PLHHSN(INIT_TEMPLATES) \
    FLOAT_LOOP_PLHHSN(INIT_TEMPLATES)



/// =================== Initialize Precision + half, Layout, Halos, HaloSpin, Nstacks ============================

#define STACK_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN)	\
    IF(BOOL(NSTACKS_1))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 1)) \
    IF(BOOL(NSTACKS_2))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 2)) \
    IF(BOOL(NSTACKS_3))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 3)) \
    IF(BOOL(NSTACKS_4))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 4)) \
    IF(BOOL(NSTACKS_5))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 5)) \
    IF(BOOL(NSTACKS_6))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 6)) \
    IF(BOOL(NSTACKS_7))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 7)) \
    IF(BOOL(NSTACKS_8))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 8)) \
    IF(BOOL(NSTACKS_9))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 9)) \
    IF(BOOL(NSTACKS_10))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 10)) \
    IF(BOOL(NSTACKS_11))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 11)) \
    IF(BOOL(NSTACKS_12)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 12)) \
    IF(BOOL(NSTACKS_14)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 14)) \
    IF(BOOL(NSTACKS_15)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 15)) \
    IF(BOOL(NSTACKS_16)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 16)) \
    IF(BOOL(NSTACKS_18)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 18)) \
    IF(BOOL(NSTACKS_20)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 20)) \
    IF(BOOL(NSTACKS_32)) (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 32))

#define HALOSPIN_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH) \
  IF(BOOL(HALODEPTHSPIN_0)) (STACK_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 0)) \
  IF(BOOL(HALODEPTHSPIN_1)) (STACK_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 1)) \
  IF(BOOL(HALODEPTHSPIN_2)) (STACK_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 2)) \
  IF(BOOL(HALODEPTHSPIN_3)) (STACK_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 3)) \
  IF(BOOL(HALODEPTHSPIN_4)) (STACK_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 4))


#define HALO_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT) \
    IF(BOOL(HALODEPTH_0)) (HALOSPIN_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 0)) \
    IF(BOOL(HALODEPTH_1)) (HALOSPIN_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 1)) \
    IF(BOOL(HALODEPTH_2)) (HALOSPIN_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 2)) \
    IF(BOOL(HALODEPTH_3)) (HALOSPIN_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 3)) \
    IF(BOOL(HALODEPTH_4)) (HALOSPIN_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, LAYOUT, 4))

#define LAYOUT_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT) \
    IF(BOOL(LAYOUT_ALL))  (HALO_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, All)) \
    IF(BOOL(LAYOUT_EVEN)) (HALO_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, Even)) \
    IF(BOOL(LAYOUT_ODD))  (HALO_LOOP_PLHHSN_HALF(INIT_TEMPLATES, floatT, Odd))

#define FLOAT_LOOP_PLHHSN_HALF(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (LAYOUT_LOOP_PLHHSN_HALF(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (LAYOUT_LOOP_PLHHSN_HALF(INIT_TEMPLATES, float)) \
    IF(BOOL(HALFPREC)) (LAYOUT_LOOP_PLHHSN_HALF(INIT_TEMPLATES, __half))

#define INIT_PLHHSN_HALF(INIT_TEMPLATES) \
    FLOAT_LOOP_PLHHSN_HALF(INIT_TEMPLATES)



/// =================== Initialize Precision + half, Layout, Halos, HaloSpin, Nstacks, Nstacks_blockdim ============================

#define INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, NSTACKS) \
    IF(BOOL(NSTACKS_BLOCK_1))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, NSTACKS, 1)) \
    IF(BOOL(NSTACKS_BLOCK_2 && !(NSTACKS % 2)))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, NSTACKS, 2)) \
    IF(BOOL(NSTACKS_BLOCK_3 && !(NSTACKS % 3)))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, NSTACKS, 3)) \
    IF(BOOL(NSTACKS_BLOCK_4 && !(NSTACKS % 4)))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, NSTACKS, 4)) \
    IF(BOOL(NSTACKS_BLOCK_5 && !(NSTACKS % 5)))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, NSTACKS, 5)) \
    IF(BOOL(NSTACKS_BLOCK_6 && !(NSTACKS % 6)))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, NSTACKS, 6)) \
    IF(BOOL(NSTACKS_BLOCK_7 && !(NSTACKS % 7)))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, NSTACKS, 7)) \
    IF(BOOL(NSTACKS_BLOCK_8 && !(NSTACKS % 8)))  (INIT_TEMPLATES(floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, NSTACKS, 8)) 

#define STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN)	\
    IF(BOOL(NSTACKS_1))  (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 1)) \
    IF(BOOL(NSTACKS_2))  (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 2)) \
    IF(BOOL(NSTACKS_3))  (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 3)) \
    IF(BOOL(NSTACKS_4))  (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 4)) \
    IF(BOOL(NSTACKS_5))  (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 5)) \
    IF(BOOL(NSTACKS_6))  (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 6)) \
    IF(BOOL(NSTACKS_7))  (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 7)) \
    IF(BOOL(NSTACKS_8))  (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 8)) \
    IF(BOOL(NSTACKS_9))  (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 9)) \
    IF(BOOL(NSTACKS_10)) (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 10)) \
    IF(BOOL(NSTACKS_11)) (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 11)) \
    IF(BOOL(NSTACKS_12)) (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 12)) \
    IF(BOOL(NSTACKS_14)) (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 14)) \
    IF(BOOL(NSTACKS_15)) (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 15)) \
    IF(BOOL(NSTACKS_16)) (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 16)) \
    IF(BOOL(NSTACKS_18)) (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 18)) \
    IF(BOOL(NSTACKS_20)) (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 20)) \
    IF(BOOL(NSTACKS_32)) (INNER_STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, HALO_DEPTH_SPIN, 32))

#define HALOSPIN_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH) \
  IF(BOOL(HALODEPTHSPIN_0)) (STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 0)) \
  IF(BOOL(HALODEPTHSPIN_1)) (STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 1)) \
  IF(BOOL(HALODEPTHSPIN_2)) (STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 2)) \
  IF(BOOL(HALODEPTHSPIN_3)) (STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 3)) \
  IF(BOOL(HALODEPTHSPIN_4)) (STACK_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, HALO_DEPTH, 4))


#define HALO_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT) \
    IF(BOOL(HALODEPTH_0)) (HALOSPIN_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, 0)) \
    IF(BOOL(HALODEPTH_1)) (HALOSPIN_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, 1)) \
    IF(BOOL(HALODEPTH_2)) (HALOSPIN_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, 2)) \
    IF(BOOL(HALODEPTH_3)) (HALOSPIN_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, 3)) \
    IF(BOOL(HALODEPTH_4)) (HALOSPIN_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, LAYOUT, 4))

#define LAYOUT_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT) \
    IF(BOOL(LAYOUT_ALL))  (HALO_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, All)) \
    IF(BOOL(LAYOUT_EVEN)) (HALO_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, Even)) \
    IF(BOOL(LAYOUT_ODD))  (HALO_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, floatT, Odd))

#define FLOAT_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES) \
    IF(BOOL(DOUBLEPREC)) (LAYOUT_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, double)) \
    IF(BOOL(SINGLEPREC)) (LAYOUT_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, float)) \
    IF(BOOL(HALFPREC)) (LAYOUT_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES, __half))

#define INIT_PLHHSNNB_HALF(INIT_TEMPLATES) \
    FLOAT_LOOP_PLHHSNNB_HALF(INIT_TEMPLATES)
